import requests
from lxml import etree
headers = {
    'User-Agent':"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36"
}

def write_local_file(filename,text):
    with open(filename,"w",encoding="utf-8") as file:
        file.write(text)
        file.close()

def test_baidu():
    url = "https://www.baidu.com/s"
    data = {
        'wd': '香港'
    }
    response = requests.get(url=url, params=data, headers=headers)
    write_local_file("baidu_test.html",response.text)

def test_douban():
    url = "https://www.douban.com/"
    response = requests.get(url=url, headers=headers)
    # write_local_file("douban_test.html",response.text)

    """xpath返回的全部都是列表"""
    html_doc = response.text
    html_tree = etree.HTML(html_doc)
    """先定位块级元素"""
    book_time_list = html_tree.xpath('//ul[@class="time-list"]/li')
    """再定位块级元素的子元素"""
    for book_time in book_time_list:
        book_img_url = book_time.xpath('./a/img/@src')    # 当前节点往下
        book_title_content = book_time.xpath('./a[@class="title"]/text()')
        print(f"book_image_url : {book_img_url}")
        print(f"ook_title_content : {book_title_content}")

test_douban()